require(Hmisc)
knitrSet(lang='markdown')
require(rms)
options(prType='html')
getHdata(FEV)
html(contents(FEV))
| Name | Units | Levels | Storage |
|---|---|---|---|
| id | integer | ||
| age | years | integer | |
| fev | liters | double | |
| height | inches | double | |
| sex | 2 | integer | |
| smoke | 2 | integer |
options(grType='plotly') # use plotly interactive graphics
plot(describe(FEV))
$Categorical
$Continuous
require(plotly, quietly=TRUE)
ggplotly(ggplot(FEV, aes(x=age, y=fev, color=height)) + geom_point() + facet_grid(smoke ~ sex))
dd <- datadist(FEV); options(datadist='dd')
htmlVerbatim(dd) # in Hmisc package
id age fev height sex smoke
Low:effect 15811.0000 8 1.981000 57.0
Adjust to 36071.0000 10 2.547500 61.5 male non-current smoker
High:effect 53638.5000 12 3.118500 65.5
Low:prediction 600.2355 4 1.252128 49.0 female non-current smoker
High:prediction 81741.1529 17 4.789642 72.0 male current smoker
Low 201.0000 3 0.791000 46.0 female non-current smoker
High 90001.0000 19 5.793000 74.0 male current smoker
Values:
sex : female male
smoke : non-current smoker current smoker
FEV is log-transformed to satisfy normality and equal variance assumptions.
f <- ols(log(fev) ~ age, data=FEV)
f
ols(formula = log(fev) ~ age, data = FEV)
|
Model Likelihood Ratio Test |
Discrimination Indexes |
|
|---|---|---|
| Obs 654 | LR χ2 592.40 | R2 0.596 |
| σ 0.2120 | d.f. 1 | R2adj 0.595 |
| d.f. 652 | Pr(>χ2) 0.0000 | g 0.287 |
Min 1Q Median 3Q Max
-0.72047 -0.13752 0.00302 0.14681 0.60267
| β | S.E. | t | Pr(>|t|) | |
|---|---|---|---|---|
| Intercept | 0.0506 | 0.0291 | 1.74 | 0.0826 |
| age | 0.0871 | 0.0028 | 31.00 | <0.0001 |
r <- as.numeric(resid(f))
with(FEV, plot(age, r))
qqnorm(r)
qqline(r)
# Show algebraic form of fitted model
g <- Function(f)
htmlVerbatim(g, exp(g()), exp(g(age=10))) # Evaluate the fitted model, antilog to get original scale
function (age = 10)
{
0.050596002 + 0.087083296 * age
}
[1] 2.512879
[1] 2.512879
latex(f, md=TRUE)
\[{\rm E({\rm log(fev)}}) = X\beta, {\rm \ \ where} \\ \]
\begin{eqnarray*}
X\hat{\beta}= & & \\
& & 0.050596 +0.0870833\:{\rm age} \\
\end{eqnarray*}
ggplot(Predict(f), addlayer=geom_point(aes(x=age, y=log(fev), color=sex), FEV))
The knots are at these quantiles of age: 0.1, 0.5, 0.9.
f <- ols(log(fev) ~ rcs(age, 3), data=FEV, x=TRUE)
print(f)
ols(formula = log(fev) ~ rcs(age, 3), data = FEV, x = TRUE)
| Model Likelihood Ratio Test |
Discrimination Indexes |
|
|---|---|---|
| Obs 654 | LR χ2 668.33 | R2 0.640 |
| σ 0.2002 | d.f. 2 | R2adj 0.639 |
| d.f. 651 | Pr(>χ2) 0.0000 | g 0.297 |
Min 1Q Median 3Q Max
-0.608234 -0.134592 0.006764 0.139165 0.538798
| β | S.E. | t | Pr(>|t|) | |
|---|---|---|---|---|
| Intercept | -0.3377 | 0.0513 | -6.58 | <0.0001 |
| age | 0.1386 | 0.0063 | 21.88 | <0.0001 |
| age' | -0.0627 | 0.0070 | -8.95 | <0.0001 |
g <- Function(f)
htmlVerbatim(g(10:20), g(), g)
[1] 0.9852999 1.0660775 1.1292243 1.1806172 1.2261332 1.2706697 1.3152062 1.3597427
[9] 1.4042792 1.4488157 1.4933522
[1] 0.9852999
function (age = 10)
{
-0.33768719 + 0.13856744 * age - 0.00097948891 * pmax(age -
6, 0)^3 + 0.0019589778 * pmax(age - 10, 0)^3 - 0.00097948891 *
pmax(age - 14, 0)^3
}
ggplot(Predict(f))
ha <- function(f) print(anova(f), dec.ms=2, dec.ss=2)
ha(f)
Analysis of Variance for log(fev) | |||||
| d.f. | Partial SS | MS | F | P | |
|---|---|---|---|---|---|
| age | 2 | 46.42 | 23.21 | 578.90 | <0.0001 |
| Nonlinear | 1 | 3.21 | 3.21 | 80.14 | <0.0001 |
| REGRESSION | 2 | 46.42 | 23.21 | 578.90 | <0.0001 |
| ERROR | 651 | 26.10 | 0.04 | ||
f <- ols(log(fev) ~ rcs(age, 5), data=FEV)
f
ols(formula = log(fev) ~ rcs(age, 5), data = FEV)
|
Model Likelihood Ratio Test |
Discrimination Indexes |
|
|---|---|---|
| Obs 654 | LR χ2 678.99 | R2 0.646 |
| σ 0.1989 | d.f. 4 | R2adj 0.644 |
| d.f. 649 | Pr(>χ2) 0.0000 | g 0.303 |
Min 1Q Median 3Q Max
-0.621716 -0.135183 0.008372 0.144535 0.543184
| β | S.E. | t | Pr(>|t|) | |
|---|---|---|---|---|
| Intercept | -0.1758 | 0.0985 | -1.78 | 0.0748 |
| age | 0.1126 | 0.0159 | 7.10 | <0.0001 |
| age’ | 0.0383 | 0.0817 | 0.47 | 0.6391 |
| age’’ | -0.2167 | 0.4077 | -0.53 | 0.5951 |
| age’’’ | -0.1496 | 0.9059 | -0.17 | 0.8689 |
Function(f)
function (age = 10) { -0.17580202 + 0.11261215 * age + 0.0003834753 * pmax(age - 5, 0)^3 - 0.0021674701 * pmax(age - 8, 0)^3 - 0.0014960677 * pmax(age - 10, 0)^3 + 0.0047044691 * pmax(age - 11, 0)^3 - 0.0014244066 * pmax(age - 15, 0)^3 } <environment: 0x55d2767e85f0>
ha(f)
Analysis of Variance for log(fev) | |||||
| d.f. | Partial SS | MS | F | P | |
|---|---|---|---|---|---|
| age | 4 | 46.85 | 11.71 | 295.97 | <0.0001 |
| Nonlinear | 3 | 3.64 | 1.21 | 30.62 | <0.0001 |
| REGRESSION | 4 | 46.85 | 11.71 | 295.97 | <0.0001 |
| ERROR | 649 | 25.68 | 0.04 | ||
ggplot(Predict(f))
f <- ols(log(fev) ~ rcs(age, 5) + sex, data=FEV)
Function(f)
function (age = 10, sex = “male”) { -0.23344699 + 0.11429274 * age + 0.00019963206 * pmax(age - 5, 0)^3 - 0.0010413455 * pmax(age - 8, 0)^3 - 0.0043083413 * pmax(age - 10, 0)^3 + 0.0067087011 * pmax(age - 11, 0)^3 - 0.0015586464 * pmax(age - 15, 0)^3 + 0.099501651 * (sex == “male”) } <environment: 0x55d27a5566d0>
ha(f)
Analysis of Variance for log(fev) | |||||
| d.f. | Partial SS | MS | F | P | |
|---|---|---|---|---|---|
| age | 4 | 46.37 | 11.59 | 312.11 | <0.0001 |
| Nonlinear | 3 | 3.68 | 1.23 | 32.99 | <0.0001 |
| sex | 1 | 1.61 | 1.61 | 43.40 | <0.0001 |
| REGRESSION | 5 | 48.46 | 9.69 | 260.92 | <0.0001 |
| ERROR | 648 | 24.07 | 0.04 | ||
ggplot(Predict(f, age, sex),
addlayer=geom_point(aes(x=age, y=log(fev), color=sex), FEV))
The following model allows for different shapes of effects for males and females.
f <- ols(log(fev) ~ rcs(age, 5) * sex, data=FEV)
r <- as.numeric(resid(f))
plot(fitted(f), r)
qqnorm(r); qqline(r)
Function(f)
function (age = 10, sex = “male”) { -0.33458936 + 0.13366998 * age - 2.9172721e-05 * pmax(age - 5, 0)^3 - 0.0039056026 * pmax(age - 8, 0)^3 + 0.0078871147 * pmax(age - 10, 0)^3 - 0.002951157 * pmax(age - 11, 0)^3 - 0.0010011823 * pmax(age - 15, 0)^3 + 0.34502774 * (sex == “male”) + (sex == “male”) * (-0.046273816 * age + 0.00093632418 * pmax(age - 5, 0)^3 + 0.0034513932 * pmax(age - 8, 0)^3 - 0.020568634 * pmax(age - 10, 0)^3 + 0.017330044 * pmax(age - 11, 0)^3 - 0.0011491273 * pmax(age - 15, 0)^3) } <environment: 0x55d2772d5b38>
ha(f)
Analysis of Variance for log(fev) | |||||
| d.f. | Partial SS | MS | F | P | |
|---|---|---|---|---|---|
| age (Factor+Higher Order Factors) | 8 | 48.31 | 6.04 | 175.70 | <0.0001 |
| All Interactions | 4 | 1.94 | 0.48 | 14.08 | <0.0001 |
| Nonlinear (Factor+Higher Order Factors) | 6 | 4.74 | 0.79 | 22.98 | <0.0001 |
| sex (Factor+Higher Order Factors) | 5 | 3.55 | 0.71 | 20.65 | <0.0001 |
| All Interactions | 4 | 1.94 | 0.48 | 14.08 | <0.0001 |
| age × sex (Factor+Higher Order Factors) | 4 | 1.94 | 0.48 | 14.08 | <0.0001 |
| Nonlinear | 3 | 0.82 | 0.27 | 7.96 | <0.0001 |
| Nonlinear Interaction : f(A,B) vs. AB | 3 | 0.82 | 0.27 | 7.96 | <0.0001 |
| TOTAL NONLINEAR | 6 | 4.74 | 0.79 | 22.98 | <0.0001 |
| TOTAL NONLINEAR + INTERACTION | 7 | 5.61 | 0.80 | 23.32 | <0.0001 |
| REGRESSION | 9 | 50.39 | 5.60 | 162.92 | <0.0001 |
| ERROR | 644 | 22.13 | 0.03 | ||
ggplot(Predict(f, age, sex))
Plot predicted median FEV by anti-logging predicted values.
ggplot(Predict(f, age, sex, fun=exp), ylab='Estimated Median FEV')
We show the joint age and height effects using a color image plot
f <- ols(log(fev) ~ rcs(age, 5) * sex + rcs(height, 5), data=FEV)
ha(f)
Analysis of Variance for log(fev) | |||||
| d.f. | Partial SS | MS | F | P | |
|---|---|---|---|---|---|
| age (Factor+Higher Order Factors) | 8 | 1.33 | 0.17 | 8.02 | <0.0001 |
| All Interactions | 4 | 0.36 | 0.09 | 4.39 | 0.0016 |
| Nonlinear (Factor+Higher Order Factors) | 6 | 0.44 | 0.07 | 3.51 | 0.0020 |
| sex (Factor+Higher Order Factors) | 5 | 0.60 | 0.12 | 5.77 | <0.0001 |
| All Interactions | 4 | 0.36 | 0.09 | 4.39 | 0.0016 |
| height | 4 | 8.88 | 2.22 | 107.27 | <0.0001 |
| Nonlinear | 3 | 0.17 | 0.06 | 2.77 | 0.0408 |
| age × sex (Factor+Higher Order Factors) | 4 | 0.36 | 0.09 | 4.39 | 0.0016 |
| Nonlinear | 3 | 0.33 | 0.11 | 5.37 | 0.0012 |
| Nonlinear Interaction : f(A,B) vs. AB | 3 | 0.33 | 0.11 | 5.37 | 0.0012 |
| TOTAL NONLINEAR | 9 | 0.58 | 0.06 | 3.12 | 0.0011 |
| TOTAL NONLINEAR + INTERACTION | 10 | 0.59 | 0.06 | 2.83 | 0.0019 |
| REGRESSION | 13 | 59.28 | 4.56 | 220.25 | <0.0001 |
| ERROR | 640 | 13.25 | 0.02 | ||
ggplot(Predict(f, age, sex), adj.subtitle=FALSE)
p <- Predict(f, age, height)
bplot(p)
p <- with(p, list(age=unique(age), height=unique(height),
Yhat=matrix(yhat, ncol=200)))
with(p, plot_ly(x=age, y=height, z=Yhat, type='heatmap'))
with(p, plot_ly(x=age, y=height, z=Yhat, type='surface'))
By plotting spike histograms showing the smoking-specific age distribution we see that there are almost no very young children who have smoked. This limits the power to detect an age by smoking interaction.
f <- ols(log(fev) ~ rcs(age, 5) + smoke, data=FEV)
ggplot(Predict(f, age, smoke), rdata=FEV)
R version 3.6.1 (2019-07-05)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 19.04
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.8.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.8.0
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] plotly_4.9.0 rms_5.1-4 SparseM_1.77 Hmisc_4.2-1
[5] ggplot2_3.2.1 Formula_1.2-3 survival_2.44-1.1 lattice_0.20-38
R Core Team (2019). R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing, Vienna, Austria. <URL: https://www.R-project.org/>.